import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
data = pd.read_csv("data.csv")
data.head(5)
BBox = ((data["Start_Lng"].min(), data["Start_Lng"].max(),
data["Start_Lat"].min(), data["Start_Lat"].max()))
BBox
Dataset contains 2.2 milions of rows. We definetely will not need all of them,so we will take a sample
data_sample = data.sample(n=3000)[['Start_Lng','Start_Lat','City','Visibility(mi)','Severity']]
LON = data_sample['Start_Lng']
LAT = data_sample['Start_Lat']
TEXT = data_sample['City']
SEVERITY = data_sample['Severity']
GEO_SCOPE = "usa"
PROJECTION = 'albers usa'
COLORS = "Blues"
MODE = "markers"
LOCATION_MODE = 'USA-states'
LAND = "rgb(255, 255, 255)"
UNIT = "rgb(0, 0, 0)"
COUNTRY = "rgb(0, 0, 0)"
data.plot(kind="scatter", x="Start_Lng", y="Start_Lat", alpha=0.4,figsize=(25,15))
plt.show()
Lets Divide accidents by state, and draw some kind of heat-map
fig = go.Figure(data=go.Choropleth(locations=pd.value_counts(data['State']).index,z = pd.value_counts(data['State']).astype(float),
locationmode = LOCATION_MODE, colorscale = COLORS,colorbar_title = "Accidents"))
fig.update_layout(title_text = 'Accidents Heat-map',geo_scope=GEO_SCOPE)
fig.show()
sample_data = data.sample(n=1000)
fig = go.Figure(data=go.Scattergeo(
locationmode = LOCATION_MODE,
lon = LON,
lat = LAT,
text = TEXT,
mode = MODE,
marker = dict(
size = 8,
opacity = 0.8,
reversescale = True,
autocolorscale = False,
symbol = 'circle',
line = dict( width=1,color='rgba(0, 0, 0)'),
colorscale = COLORS,
cmin = SEVERITY.min(),
color = SEVERITY,
cmax =SEVERITY.max(),
colorbar_title = "Severity"
)))
fig.update_layout(
title = 'Severity of accidents',
geo = dict(
scope=GEO_SCOPE,
projection_type= PROJECTION,
showland = True,
landcolor = LAND,subunitcolor = UNIT,countrycolor = COUNTRY,
countrywidth = 3,
subunitwidth = 3
),
)
fig.show()
df_county = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/laucnty16.csv')
df_county.head(3)
df_county['county_full'] = df_county['County Name/State Abbreviation'].apply(lambda x: x.split(', ')[0])
df_county['county_name'] = df_county['county_full'].apply(lambda x: x.split(' County')[0])
fips_county_df = df_county[['county_name', 'County FIPS Code', 'State FIPS Code']].merge(pd.value_counts(data['County']).to_frame(), left_on='county_name', right_index=True)
import plotly.figure_factory as ff
fips_county_df['State FIPS Code'] = fips_county_df['State FIPS Code'].apply(lambda x: str(x).zfill(2))
fips_county_df['County FIPS Code'] = fips_county_df['County FIPS Code'].apply(lambda x: str(x).zfill(3))
fips_county_df['FIPS'] = fips_county_df['State FIPS Code'] + fips_county_df['County FIPS Code']
colorscale = ["#f7fbff", "#ebf3fb", "#deebf7", "#d2e3f3", "#c6dbef", "#b3d2e9", "#9ecae1",
"#85bcdb", "#6baed6", "#57a0ce", "#4292c6", "#3082be", "#2171b5", "#1361a9",
"#08519c", "#0b4083", "#08306b"
]
endpts = list(np.linspace(1,30000, len(colorscale) - 1))
fips = fips_county_df['FIPS'].tolist()
values = fips_county_df['County'].tolist()
fig = ff.create_choropleth(
fips=fips, values=values, scope=[GEO_SCOPE],
binning_endpoints=endpts, colorscale=colorscale,
show_state_data=False,
show_hover=True,
asp = 2.9,
title_text = 'USA County accidents count',
legend_title = 'Accidents count'
)
fig.layout.template = None
fig.show()
data_sample.dropna(inplace=True)
fig = go.Figure(data=go.Scattergeo(
locationmode = LOCATION_MODE,
lon = LON,lat = LAT,text = TEXT,mode = MODE,
marker = dict(
size = data_sample['Visibility(mi)'],
opacity = 0.8,
reversescale = True,
autocolorscale = False,
symbol = 'circle',
line = dict(
width=1,
color='rgba(102, 102, 102)'
),
colorscale = COLORS,
cmin = SEVERITY.max(),
color = SEVERITY,
cmax = 1,
colorbar_title="Severity"
)))
fig.update_layout(
title = 'Severity & Visibility of accidents',
geo = dict(
scope=GEO_SCOPE,
projection_type=PROJECTION,
showland = True,
landcolor = LAND, subunitcolor = UNIT,countrycolor = COUNTRY,
countrywidth = 0.7,
subunitwidth = 0.7
),
)
fig.show()